// Table 1 & Figure 2 


// Load Data
use "all_tasklevel_data.dta", clear
drop if missing(sex)

label define dec_ 1 "1st Decile Break" 2 "2nd Decile Break" 3 "3rd Decile Break" 4 "4th Decile Break" ///
  5 "5th Decile Break" 6 "6th Decile Break" 7 "7th Decile Break" 8 "8th Decile Break" 9 "9th Decile Break" 10 "10th Decile Break" 11 "First Task of Day", add
  label values abs_tdiff_group dec_

label define dec_2 1 "1st Decile Session Task" 2 "2nd Decile Session Task" 3 "3rd Decile Session Task" 4 "4th Decile Session Task" ///
  5 "5th Decile Session Task" 6 "6th Decile Session Task" 7 "7th Decile Session Task" 8 "8th Decile Session Task" 9 "9th Decile Session Task" 10 "10th Decile Session Task", add
 label values cluster10_group dec_2
 
label var same_task "Same Task at t-1"

sort year_a month_a
egen time_group = group(month_a year_a) 
gen quarter_group = 0

local index=1
forvalues i = 1(3)39 {
	replace quarter_group = `i' if time_group >= `i' & time_group <= `i'+3
	local index = `index'+1
}

g overlap = (time_diff < 0)

// Full Sample

set more off

reg log_interval_wage sex log_task_reward i.task_type_ log_worker_n ///
			 i.quarter_group if in_sample==1 & returned==0, vce(cluster worker_id)
estimates store work_1
estadd local task_control "yes"
estadd local work_control "no"

reg log_interval_wage sex log_task_reward i.task_type_ log_worker_n ///
				i.abs_tdiff_group overlap i.cluster10_group same_task i.quarter_group i.time_of_day ///
				if in_sample==1 & returned==0, vce(cluster worker_id)
estimates store work_2
estadd local task_control "yes"
estadd local work_control "yes"

// Batch v No Batch tasks 

forvalues i = 0/1{

	reg log_interval_wage sex log_task_reward i.task_type_ log_worker_n ///
				 i.quarter_group if in_sample==1 & returned==0 & no_batch==`i', vce(cluster worker_id)
	estimates store work_1_`i'
	estadd local task_control "yes"
	estadd local work_control "no"
	
	reg log_interval_wage sex log_task_reward i.task_type_ log_worker_n ///
					i.abs_tdiff_group overlap i.cluster10_group same_task i.quarter_group i.time_of_day ///
					if in_sample==1 & returned==0 & no_batch==`i', vce(cluster worker_id)
	estimates store work_2_`i'
	estadd local task_control "yes"
	estadd local work_control "yes"

 }
 
 esttab work_1 work_2 work_1_0 work_1_1 work_2_0 work_2_1 ///
		using "merged_work_pattern.tex" , ///
  b(4) p(4) label  replace nomtitles booktabs  star(* 0.10 ** 0.05 *** 0.01) nodepvars   ///
  nonotes noconstant keep(sex)


// Batch v No Batch tasks - for p-values need to only introduce cluster at suest 
eststo clear 
reg log_interval_wage sex log_task_reward i.task_type_ log_worker_n ///
				 i.quarter_group if in_sample==1 & returned==0
estimates store work_1

reg log_interval_wage sex log_task_reward i.task_type_ log_worker_n ///
				i.abs_tdiff_group overlap i.cluster10_group same_task i.quarter_group i.time_of_day ///
				if in_sample==1 & returned==0
estimates store work_2

forvalues i = 0/1{

	reg log_interval_wage sex log_task_reward i.task_type_ log_worker_n ///
				 i.quarter_group if in_sample==1 & returned==0 & no_batch==`i'
	estimates store work_1_`i'
	
	reg log_interval_wage sex log_task_reward i.task_type_ log_worker_n ///
					i.abs_tdiff_group overlap i.cluster10_group same_task i.quarter_group i.time_of_day ///
					if in_sample==1 & returned==0 & no_batch==`i'
	estimates store work_2_`i'
	
 }

suest work_1 work_2, vce(cluster worker_nid)
test [work_1_mean]sex = [work_2_mean]sex
forvalues i=1/2 {
	suest work_`i'_0 work_`i'_1, vce(cluster worker_nid)
	test [work_`i'_0_mean]sex = [work_`i'_1_mean]sex
} 

  
// Figure 2 Gelbach decomposition
foreach thing in task_type_ cluster10_group abs_tdiff_group ///
				same_task time_of_day quarter_group {
	qui tab `thing', g(indic_`thing')
	ren indic_`thing'1 _indic_`thing'1
}	

b1x2 log_interval_wage if in_sample==1 & returned==0, x1all(sex) x2all(log_task_reward indic_task_type* ///
		 log_worker_n indic_cluster10_group* ///
		 indic_abs_tdiff_group* overlap indic_time_of_day* indic_quarter_group*) x1only(sex) ///
		 x2delta( task_reward_group = log_task_reward : ///
				  task_type_group = indic_task_type* : ///
				  exp_group = log_worker_n : ///
				  pattern_group1 = indic_cluster10_group* : ///
				  pattern_group2 = indic_abs_tdiff_group* : ///
				  pattern_group2a = overlap : ///
				  pattern_group3 = same_task : ///
				  pattern_group4 = indic_time_of_day*) ///
		 cluster(worker_id) nofull nobase

 
matrix tmp = e(Delta)
matrix tmp2 = e(Covdelta)
matrix decomp_b = J(1, 8, .)
matrix CI = J(2, 8, .)
forvalues i = 1/8 {
	matrix decomp_b[1, `i'] = tmp[1,`i']
	matrix CI[1, `i'] = tmp[1,`i'] - 1.96*sqrt(tmp2[`i',`i'])
	matrix CI[2, `i'] = tmp[1,`i'] + 1.96*sqrt(tmp2[`i',`i'])
}
				
coefplot(matrix(decomp_b)), ci(CI) xline(0)	xtitle("Contribution to Gender Pay Gap") ///
					 graphregion(color(white)) ylabel(1 "Task Reward" ///
														2 "Task Type" ///
														3 "Total Experience" ///
														4 "Work Session" ///
														5 "Breaks" ///
														6 "Overlap" ///
														7 "Same Task" ///
														8 "Time of Day")



 
